In [1]:
from msdas import *
from msdas import yeast
%pylab inline


Couldn't import dot_parser, loading of dot files will not be possible.
Populating the interactive namespace from numpy and matplotlib

In [2]:
from easydev import gsf

In [3]:
filename = gsf("msdas", "data", "Yeast_all_raw.csv")

In [4]:
rep = replicates.Replicates(filename, verbose=True)


INFO:root:Reading /home/cokelaer/Work/github/msdas/share/data/Yeast_all_raw.csv
INFO:root:Renaming psites with ^ character
INFO:root:Replacing zeros with NAs
INFO:root:-- 200 rows have ambiguous psites and are removed
INFO:root:save data in attribute _ambiguous_psites_df
INFO:root:--------------------------------------------------
INFO:root:-- Removing 125 rows with ambigous protein names:
INFO:root:--------------------------------------------------
WARNING:root:Rebuilding identifier in the dataframe. MERGED prefixes will be lost
WARNING:root:Identifiers are not unique. Have you called merge_peptides() ?

In [5]:
# a = annotations.Annotations(rep, 'YEAST', verbose=True)
# a.set_annotations()
# a.to_pickle(tag='all')
rep.read_annotations(gsf('msdas', 'data', 'YEAST_annotations_all.pkl'))

In [7]:
rep.merge_peptides()


INFO:root:Merging ambiguous peptides (different peptides but same psites
INFO:root:Merging 5016 rows into 5016 groups.
INFO:root:Merged 0 groups (with more than 1 peptide) into 0 new rows.
INFO:root:New data frame has 5016 rows
WARNING:root:Rebuilding identifier in the dataframe. MERGED prefixes will be lost
INFO:root:Merging identical peptides (but with psites at different locations
INFO:root:grouping in progress
INFO:root:Merging 5016 rows into 3511 groups.
INFO:root:Merged 3511 groups (with more than 1 peptide) into 0 new rows.
WARNING:root:Rebuilding identifier in the dataframe. MERGED prefixes will be lost

In [8]:
# if you have already an instance of MassSpecReader, you can call 
# directly read the replicates as follow
# r = readers.MassSpecReader(filename, merge_peptides=True)
# Replicatse is a child of MassSpecReader, so you can also do :
rep = replicates.Replicates(rep)

In [9]:
data = rep.hist_coefficient_variation(normed=True, color="red")



In [10]:
rep.metadata.columns


Out[10]:
Index([u'Identifier', u'Protein', u'Sequence', u'Sequence_Phospho', u'Psite',
       u'Entry', u'Entry_name'],
      dtype='object')

In [11]:
rep.measurements.columns


Out[11]:
Index([u'Ma0s04_0', u'Ma0s04_0.1', u'Ma0s04_0.2', u'Ma0s04_1', u'Ma0s04_1.1',
       u'Ma0s04_1.2', u'Ma0s04_5', u'Ma0s04_5.1', u'Ma0s04_5.2', u'Ma0s04_10', 
       ...
       u'Ma45s04_5.2', u'Ma45s04_10', u'Ma45s04_10.1', u'Ma45s04_10.2',
       u'Ma45s04_20', u'Ma45s04_20.1', u'Ma45s04_20.2', u'Ma45s04_45',
       u'Ma45s04_45.1', u'Ma45s04_45.2'],
      dtype='object', length=108)

In [12]:
rep = replicates.ReplicatesYeast(rep)

In [13]:
rep.measurements.columns


Out[13]:
Index([u'a0_t0', u'a0_t0.1', u'a0_t0.2', u'a0_t1', u'a0_t1.1', u'a0_t1.2',
       u'a0_t5', u'a0_t5.1', u'a0_t5.2', u'a0_t10', 
       ...
       u'a45_t5.2', u'a45_t10', u'a45_t10.1', u'a45_t10.2', u'a45_t20',
       u'a45_t20.1', u'a45_t20.2', u'a45_t45', u'a45_t45.1', u'a45_t45.2'],
      dtype='object', length=108)

In [14]:
rep.boxplot()



In [15]:
rep.plot_mu_sigma(["a0_t0"])


NA studies


In [16]:
rep.hist_na_per_experiments()
# there are 108 experiments() with replicates, which is the xaxis



In [17]:
na = rep.pcolor_na(sort_index=True, noxticks=True)



In [18]:
rep.plot_na_per_experiment()


Get the errors


In [19]:
mu = rep.get_mu_df()

In [20]:
mu.ix[0], rep.df.ix[0].Identifier


Out[20]:
(a0_t0      3.816629e+08
 a0_t1      3.624692e+08
 a0_t5      3.745957e+08
 a0_t10     3.442398e+08
 a0_t20     3.590273e+08
 a0_t45     3.779120e+08
 a1_t0      3.885887e+08
 a1_t1      3.987150e+08
 a1_t5      3.704923e+08
 a1_t10     3.253585e+08
 a1_t20     3.313587e+08
 a1_t45     3.782939e+08
 a5_t0      3.045297e+08
 a5_t1      3.403139e+08
 a5_t5      2.927528e+08
 a5_t10     3.366247e+08
 a5_t20     3.474529e+08
 a5_t45     4.133215e+08
 a10_t0     2.844253e+08
 a10_t1     3.477076e+08
 a10_t5     3.113154e+08
 a10_t10    3.050076e+08
 a10_t20    2.805034e+08
 a10_t45    2.682643e+08
 a20_t0     3.153507e+08
 a20_t1     3.124522e+08
 a20_t5     3.128767e+08
 a20_t10    3.271161e+08
 a20_t20    3.109574e+08
 a20_t45    3.456041e+08
 a45_t0     9.941186e+07
 a45_t1     9.534324e+07
 a45_t5     9.634498e+07
 a45_t10    1.209264e+08
 a45_t20    1.111171e+08
 a45_t45    9.902624e+07
 Name: 0, dtype: float64, 'ABF1_S720')

In [ ]: